From c6dc2fafc02cd1a5593ed2825dc0f7f55a6ac87e Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Thu, 1 Jun 2017 17:49:56 +0200
Subject: [PATCH 1/5] Add Spanish and move example sentences to meta

---
 website/_harp.json             | 16 ++++++++++++++--
 website/docs/usage/index.jade  |  1 +
 website/docs/usage/models.jade |  3 +--
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/website/_harp.json b/website/_harp.json
index 8c16ccc16..25ad3c5d2 100644
--- a/website/_harp.json
+++ b/website/_harp.json
@@ -77,7 +77,8 @@
             { "id": "model", "title": "Models", "multiple": true, "options": [
                 { "id": "en", "title": "English", "meta": "50MB" },
                 { "id": "de", "title": "German", "meta": "645MB" },
-                { "id": "fr", "title": "French", "meta": "1.33GB" }]
+                { "id": "fr", "title": "French", "meta": "1.33GB" },
+                { "id": "es", "title": "Spanish", "meta": "377MB"}]
             }
         ],
 
@@ -85,7 +86,8 @@
             { "id": "lang", "title": "Language", "options": [
                 { "id": "en", "title": "English", "checked": true },
                 { "id": "de", "title": "German" },
-                { "id": "fr", "title": "French" }]
+                { "id": "fr", "title": "French" },
+                { "id": "es", "title": "Spanish" }]
             },
             { "id": "load", "title": "Loading style", "options": [
                 { "id": "spacy", "title": "Use spacy.load()", "checked": true, "help": "Use spaCy's built-in loader to load the model by name." },
@@ -108,9 +110,19 @@
             ],
             "fr": [
                 { "id": "fr_depvec_web_lg", "lang": "French", "feats": [1, 1, 0, 1], "size": "1.33 GB", "license": "CC BY-NC" }
+            ],
+            "es": [
+                { "id": "es_core_web_md", "lang": "Spanish", "feats": [1, 1, 1, 1], "size": "377 MB", "license": "CC BY-SA"}
             ]
         },
 
+        "EXAMPLE_SENTENCES": {
+            "en": "This is a sentence.",
+            "de": "Dies ist ein Satz.",
+            "fr": "C'est une phrase.",
+            "es": "Esto es una frase."
+        },
+
         "ALPHA": true,
         "V_CSS": "1.6",
         "V_JS": "1.2",
diff --git a/website/docs/usage/index.jade b/website/docs/usage/index.jade
index c79c689a4..d3deaa17e 100644
--- a/website/docs/usage/index.jade
+++ b/website/docs/usage/index.jade
@@ -40,6 +40,7 @@ p
     +qs({model: 'en'}) python -m spacy download en
     +qs({model: 'de'}) python -m spacy download de
     +qs({model: 'fr'}) python -m spacy download fr
+    +qs({model: 'es'}) python -m spacy download es
 
 +h(2, "installation") Installation instructions
 
diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade
index a837b4d29..bc0f14e01 100644
--- a/website/docs/usage/models.jade
+++ b/website/docs/usage/models.jade
@@ -18,7 +18,6 @@ p
     |  skew, which might decrease your accuracy.
 
 +quickstart(QUICKSTART_MODELS, "Quickstart", "Install a default model, get the code to load it from within spaCy and an example to test it. For more options, see the section on available models below.")
-    - var examples = {en: "This is a sentence.", de: "Dies ist ein Satz.", fr: "C'est une phrase."}
     for models, lang in MODELS
         - var package = (models.length == 1) ? models[0] : models.find(function(m) { return m.def })
         +qs({lang: lang}) python -m spacy download #{lang}
@@ -26,7 +25,7 @@ p
         +qs({lang: lang, load: "module"}, "python") import #{package.id}
         +qs({lang: lang, load: "module"}, "python") nlp = #{package.id}.load()
         +qs({lang: lang, load: "spacy"}, "python") nlp = spacy.load('#{lang}')
-        +qs({lang: lang, config: "example"}, "python") doc = nlp(u"#{examples[lang]}")
+        +qs({lang: lang, config: "example"}, "python") doc = nlp(u"#{EXAMPLE_SENTENCES[lang]}")
         +qs({lang: lang, config: "example"}, "python") print([(w.text, w.pos_) for w in doc])
 
 +h(2, "available") Available models

From 6c908700c45f0a109e8fd1a66a2ecce0d172c93e Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Thu, 1 Jun 2017 18:20:33 +0200
Subject: [PATCH 2/5] Add alpha badge

---
 website/assets/img/graphics.svg | 11 +++++++++++
 website/index.jade              |  2 +-
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/website/assets/img/graphics.svg b/website/assets/img/graphics.svg
index c24473b4c..a449c3d04 100644
--- a/website/assets/img/graphics.svg
+++ b/website/assets/img/graphics.svg
@@ -1,5 +1,16 @@
 <svg style="position: absolute; width: 0; height: 0;" width="0" height="0" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
     <defs>
+        <symbol id="v2alpha" viewBox="0 0 200 111">
+            <title>spaCy v2.0.0 alpha</title>
+            <path fill="#ddd" d="M183.3 89.2l-164.6-40-1-29.2 164.6 40M3.8 106.8l41.6-1.4-1-29.2-41.6 1.4L13.2 92"/>
+            <path fill="#a3cad3" d="M45.4 105.4L19.6 94.6l25.4-1"/>
+            <path fill="#ddd" d="M196.6 2L155 3.4l1 29.2 41.6-1.4L187.2 17"/>
+            <path fill="#a3cad3" d="M155 3.4l25.8 10.8-25.4 1"/>
+            <path fill="#fff" d="M17.6 19.4l163-5.6 1 29.2-163 5.6zM19.2 65.6l163-5.6 1 29.2-163 5.6z"/>
+            <path fill="#008EBC" d="M45.8 29h-3.6v-2.4l10-.4.2 2.5h-3.6l.4 10.8h-3L45.8 29zM62 39L59 34.5h-1.6l.2 5h-3l-.5-13.2L59 26c3 0 5.2.8 5.3 4 0 1.8-.8 3-2.2 3.8l3.3 5.2H62zm-4.5-6.8H59c1.6-.2 2.4-.8 2.3-2 0-1.4-1-1.8-2.5-1.8h-1.5l.2 3.8zM69 34.2l-4.3-8.4H68l1.2 3 1.2 2.8c.4-1 .8-2 1-3l1.2-3 3-.2L72 34l.2 4.7h-3l-.2-4.5zM79.5 25.3h3.2l1.8 6 1.2 4.2c.5-1.5.7-2.8 1-4.3L88 25h3L87.7 38H84l-4.5-13zM92.4 25l8.3-.4V27l-5.2.3V30l4.6-.3.2 2.5-4.5.2v3l5.6-.2v2.5L93 38l-.6-13zM111 37.4l-2.6-4.7h-1.6l.2 5h-3l-.5-13.2 4.8-.2c2.8 0 5 .8 5.2 4 0 1.8-.8 3-2.2 3.8l3.2 5.3H111zm-4.3-7h1.5c1.6 0 2.4-.7 2.3-2 0-1.3-1-1.7-2.5-1.7h-1.5l.2 3.8zM116.8 33.5c1 .8 2.2 1.3 3.3 1.3 1.3 0 2-.5 2-1.3s-1-1-2-1.5l-1.8-.7c-1.4-.5-2.7-1.6-2.8-3.5 0-2.2 1.8-4 4.6-4 1.5-.2 3 .4 4.3 1.5l-1.4 2c-1-.7-1.8-1-3-1-1 0-1.6.4-1.5 1.2 0 .8 1 1 2 1.5l1.8.6c1.6.6 2.7 1.6 2.7 3.5 0 2.3-1.7 4.2-4.8 4.4-1.7 0-3.6-.5-5-1.7l1.6-2.2zM126.8 23.7h3l.5 13-3 .2-.5-13.3zM132.5 30c0-4.3 2.2-7 5.8-7 3.6 0 6 2.3 6.2 6.6 0 4.3-2.2 7-5.8 7-3.5.3-6-2.3-6.2-6.6zm9-.3c-.2-2.6-1.4-4.2-3.2-4-1.8 0-3 1.6-2.8 4.2 0 2.5 1.3 4.2 3 4 2 0 3-1.6 3-4.3zM146.7 23h3l3.8 6.3 1.4 3c-.2-1.5-.5-3.3-.5-5l-.2-4.6h2.8l.6 13-3 .2-3.8-6.6-1.4-2.8c0 1.5.4 3.2.4 4.8l.2 4.7-3 .2-.3-13.2z"/>
+            <path fill="#1A1E23" d="M50.2 84.7c3.2-3.2 5.4-5.5 5.3-7.3 0-1.3-.8-2-2-2-.8 0-1.5.8-2 1.5l-1.8-1.6c1.2-1.4 2.4-2 4.2-2.2 2.4 0 4.2 1.5 4.3 4 0 2-2 4.4-4 6.7.7-.2 1.6-.3 2.2-.3H59l.2 2.4-9 .4v-1.7zM63 82.4c1 0 2 .7 2 1.8 0 1-.7 2-1.7 2s-1.8-.8-2-2c0-1 .7-1.8 1.8-1.8zM66.7 79.3c-.2-4.4 1.6-6.7 4.4-6.8 3 0 4.8 2 5 6.5s-1.7 6.8-4.5 7c-2.7 0-4.6-2.3-4.8-6.7zM73 79c0-3.4-.8-4.2-1.8-4-1 0-1.8.7-1.6 4.3 0 3.5 1 4.4 2 4.3 1 0 1.6-1 1.5-4.5zM79.8 81.8c1 0 1.8.7 2 1.8 0 1-.8 2-1.8 2s-1.8-.8-2-2c0-1 .8-1.7 1.8-1.8zM83.5 78.7C83.3 74.3 85 72 88 72c2.7-.2 4.6 2 4.7 6.4s-1.6 6.8-4.4 7c-2.8 0-4.7-2.3-4.8-6.7zm6.3-.2c0-3.5-1-4.3-2-4.2-1 0-1.7.8-1.5 4.4 0 3.5 1 4.4 2 4.3 1 0 1.7-1 1.5-4.5zM105.5 81.3h-4l-.7 3.3h-3l3.7-13.2h3.6l4.7 13h-3.2l-1-3zm-.7-2.3l-.4-1.2-1.2-4.2-1 4.3-.3 1h2.8zM110.5 71h3l.4 10.7 5-.2.2 2.5-8.2.3-.5-13.2zM121 70.7l4.7-.2c3 0 5.2 1 5.3 4 0 3.2-2.2 4.7-5 4.7h-1.8l.2 4.6h-3l-.5-13zm4.7 6.2c1.6-.2 2.4-1 2.4-2.3 0-1.4-.8-2-2.4-1.8H124v4h1.7zM133 70.3h3l.3 5 4.5-.2-.2-5h3l.5 13-3 .2v-5.5l-4.6.2.2 5.4h-3l-.5-13zM153.3 79.7h-4l-.7 3.3h-3l3.7-13.2h3.6l4.5 13h-3.2l-1-3zm-.7-2.3l-.4-1.2L151 72l-1 4.3-.3 1.2h3z"/>
+        </symbol>
+
         <symbol id="usersurvey" viewBox="0 0 200 111">
             <title>spaCy user survey 2017</title>
             <path fill="#ddd" d="M183.3 89.2l-164.6-40-1-29.2 164.6 40M3.8 106.8l41.6-1.4-1-29.2-41.6 1.4L13.2 92"/>
diff --git a/website/index.jade b/website/index.jade
index b4e987cfb..741db53cf 100644
--- a/website/index.jade
+++ b/website/index.jade
@@ -11,7 +11,7 @@ include _includes/_mixins
     h2.c-landing__title.o-block.u-heading-1
         | in Python
 
-    +landing-badge("https://survey.spacy.io", "usersurvey", "Take the user survey!")
+    +landing-badge(gh("spaCy") + "/releases/tag/v2.0.0-alpha", "v2alpha", "Try spaCy v2.0.0 alpha!")
 
     +grid.o-content
         +grid-col("third").o-card

From 8bee34126dfd2735485dc82134b23547438394bd Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Thu, 1 Jun 2017 18:22:35 +0200
Subject: [PATCH 3/5] Update model size

---
 website/_harp.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/website/_harp.json b/website/_harp.json
index 25ad3c5d2..07afcbaa2 100644
--- a/website/_harp.json
+++ b/website/_harp.json
@@ -78,7 +78,7 @@
                 { "id": "en", "title": "English", "meta": "50MB" },
                 { "id": "de", "title": "German", "meta": "645MB" },
                 { "id": "fr", "title": "French", "meta": "1.33GB" },
-                { "id": "es", "title": "Spanish", "meta": "377MB"}]
+                { "id": "es", "title": "Spanish", "meta": "378MB"}]
             }
         ],
 
@@ -112,7 +112,7 @@
                 { "id": "fr_depvec_web_lg", "lang": "French", "feats": [1, 1, 0, 1], "size": "1.33 GB", "license": "CC BY-NC" }
             ],
             "es": [
-                { "id": "es_core_web_md", "lang": "Spanish", "feats": [1, 1, 1, 1], "size": "377 MB", "license": "CC BY-SA"}
+                { "id": "es_core_web_md", "lang": "Spanish", "feats": [1, 1, 1, 1], "size": "378 MB", "license": "CC BY-SA"}
             ]
         },
 

From 9064fbbf1ecef918c10f9293447a8fd3fd2015c6 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Thu, 1 Jun 2017 18:57:02 +0200
Subject: [PATCH 4/5] Fix empty arguments in mixins

---
 website/_includes/_mixins.jade | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/website/_includes/_mixins.jade b/website/_includes/_mixins.jade
index ce8bfad4e..9de43b092 100644
--- a/website/_includes/_mixins.jade
+++ b/website/_includes/_mixins.jade
@@ -107,13 +107,13 @@ mixin button(url, trusted, ...style)
     height   - [integer] optional height to clip code block to
 
 mixin code(label, language, icon, height)
-    pre.c-code-block.o-block(class="lang-#{(language || DEFAULT_SYNTAX)}" class=icon ? "c-code-block--has-icon" : "" style=height ? "height: #{height}px" : "")&attributes(attributes)
+    pre.c-code-block.o-block(class="lang-#{(language || DEFAULT_SYNTAX)}" class=icon ? "c-code-block--has-icon" : null style=height ? "height: #{height}px" : null)&attributes(attributes)
         if label
             h4.u-text-label.u-text-label--dark=label
 
         if icon
             - var classes = {'accept': 'u-color-green', 'reject': 'u-color-red'}
-            .c-code-block__icon(class=classes[icon] || "" class=classes[icon] ? "c-code-block__icon--border" : "")
+            .c-code-block__icon(class=classes[icon] || null class=classes[icon] ? "c-code-block__icon--border" : null)
                 +icon(icon, 18)
 
         code.c-code-block__content

From 307d615c5f81fa4bbc8de432c468f7c37d5a3dc9 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Thu, 1 Jun 2017 12:18:36 -0500
Subject: [PATCH 5/5] Fix serialization for tagger when tag_map has changed

---
 spacy/pipeline.pyx | 36 +++++++++++++++++++++++++++---------
 spacy/vocab.pyx    |  5 -----
 2 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index aeec2dba4..d4d94a476 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -10,6 +10,7 @@ cimport numpy as np
 import cytoolz
 import util
 from collections import OrderedDict
+import ujson
 
 from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
 from thinc.neural import Model, Maxout, Softmax, Affine
@@ -33,6 +34,7 @@ from .gold cimport GoldParse
 from .morphology cimport Morphology
 from .vocab cimport Vocab
 from .syntax import nonproj
+from .compat import json_dumps
 
 from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
 from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
@@ -308,7 +310,7 @@ class NeuralTagger(object):
             if self.model is True:
                 token_vector_width = util.env_opt('token_vector_width', 128)
                 self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
-                self.model.from_bytes(b)
+            self.model.from_bytes(b)
         deserialize = OrderedDict((
             ('vocab', lambda b: self.vocab.from_bytes(b)),
             ('model', lambda b: load_model(b)),
@@ -317,17 +319,33 @@ class NeuralTagger(object):
         return self
 
     def to_disk(self, path, **exclude):
-        serialize = {
-            'model': lambda p: p.open('wb').write(self.model.to_bytes()),
-            'vocab': lambda p: self.vocab.to_disk(p)
-        }
+        serialize = OrderedDict((
+            ('vocab', lambda p: self.vocab.to_disk(p)),
+            ('tag_map', lambda p: p.open('w').write(json_dumps(
+                self.vocab.morphology.tag_map))),
+            ('model', lambda p: p.open('wb').write(self.model.to_bytes())),
+        ))
         util.to_disk(path, serialize, exclude)
 
     def from_disk(self, path, **exclude):
-        deserialize = {
-            'model': lambda p: self.model.from_bytes(p.open('rb').read()),
-            'vocab': lambda p: self.vocab.from_disk(p)
-        }
+        def load_model(p):
+            if self.model is True:
+                token_vector_width = util.env_opt('token_vector_width', 128)
+                self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width)
+            self.model.from_bytes(p.open('rb').read())
+
+        def load_tag_map(p):
+            with p.open() as file_:
+                tag_map = ujson.loads(file_.read())
+            self.vocab.morphology = Morphology(
+                self.vocab.strings, tag_map=tag_map,
+                lemmatizer=self.vocab.morphology.lemmatizer)
+
+        deserialize = OrderedDict((
+            ('vocab', lambda p: self.vocab.from_disk(p)),
+            ('tag_map', load_tag_map),
+            ('model', load_model),
+        ))
         util.from_disk(path, deserialize, exclude)
         return self
 
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index b3410a02b..d42e8951b 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -315,7 +315,6 @@ cdef class Vocab:
         getters = OrderedDict((
             ('strings', lambda: self.strings.to_bytes()),
             ('lexemes', lambda: self.lexemes_to_bytes()),
-            ('tag_map', lambda: self.morphology.tag_map),
         ))
         return util.to_bytes(getters, exclude)
 
@@ -326,13 +325,9 @@ cdef class Vocab:
         **exclude: Named attributes to prevent from being loaded.
         RETURNS (Vocab): The `Vocab` object.
         """
-        def set_tag_map(tag_map):
-            self.morphology = Morphology(self.strings, tag_map,
-                                        self.morphology.lemmatizer)
         setters = OrderedDict((
             ('strings', lambda b: self.strings.from_bytes(b)),
             ('lexemes', lambda b: self.lexemes_from_bytes(b)),
-            ('tag_map', lambda b: set_tag_map(b))
         ))
         return util.from_bytes(bytes_data, setters, exclude)